home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
ShareWare OnLine 2
/
ShareWare OnLine Volume 2 (CMS Software)(1993).iso
/
util2
/
pgp22src.zip
/
SRC
/
MC68020.S
< prev
next >
Wrap
Text File
|
1993-03-07
|
6KB
|
313 lines
| Fast assembly routines for MC68020 (Sun-3)
| Assumptions:
| Arguments start at sp@(0x4)
| Return value is in d0
| d0/d1/a0/a1 are scratch
| P_SMUL needs MULTUNIT set to "unsigned long" in mpilib.c
| P_DMUL replaces mp_smul and mp_dmul in mpilib.c
|
| 92.9.21 - Tsutomu Shimomura, tsutomu@ucsd.edu
.text
| P_SETP(p) sets the current precision to be p longwords. No-op.
.proc
.globl _P_SETP
_P_SETP:
| movl #L2000, a0 |%
| jsr mcount |%
| .bss |%
| .even |%
|L2000: .skip 4 |%
| .text |%
rts
| P_ADDC(*a, *b, c) performs a += b + c (carry). Carry is returned.
.proc
.globl _P_ADDC
_P_ADDC:
| movl #L2001, a0 |%
| jsr mcount |%
| .bss |%
| .even |%
|L2001: .skip 4 |%
| .text |%
movl sp@(0x4), a0 | claim arguments
movl sp@(0x8), a1
movl sp@(0xc), d0
movl d2, sp@- | preserve d2
movw _global_precision, d1 | longword count
movw d1, d2 | save a copy
lslw #2, d1
addw d1, a0 | adjust array pointers
addw d1, a1
lsrw #1, d1 | compute initial branch offset
andw #0xe, d1
negw d1 | branch offset in d1
lsrw #3, d2 | 8 longwords/loop; count in d2
asrl #1, d0 | set X if necessary
jmp pc@(0x12,d1:w)
1:
addxl a1@-, a0@-
addxl a1@-, a0@-
addxl a1@-, a0@-
addxl a1@-, a0@-
addxl a1@-, a0@-
addxl a1@-, a0@-
addxl a1@-, a0@-
addxl a1@-, a0@-
dbf d2, 1b
roxll #1, d0
movl sp@+, d2
rts
| P_SUBB(*a, *b, c) performs a -= b + c (borrow). Borrow is returned.
.proc
.globl _P_SUBB
_P_SUBB:
| movl #L2002, a0 |%
| jsr mcount |%
| .bss |%
| .even |%
|L2002: .skip 4 |%
| .text |%
movl sp@(0x4), a0 | claim arguments
movl sp@(0x8), a1
movl sp@(0xc), d0
movl d2, sp@- | preserve d2
movw _global_precision, d1 | longword count
movw d1, d2 | save a copy
lslw #2, d1
addw d1, a0 | adjust array pointers
addw d1, a1
lsrw #1, d1 | compute initial branch offset
andw #0xe, d1
negw d1 | branch offset in d1
lsrw #3, d2 | 8 longwords/loop; count in d2
asrl #1, d0 | set X if necessary
jmp pc@(0x12,d1:w)
1:
subxl a1@-, a0@-
subxl a1@-, a0@-
subxl a1@-, a0@-
subxl a1@-, a0@-
subxl a1@-, a0@-
subxl a1@-, a0@-
subxl a1@-, a0@-
subxl a1@-, a0@-
dbf d2, 1b
roxll #1, d0
movl sp@+, d2
rts
| P_ROTL(*a, c) performs a = (a<<1) | c (lo-bit). Hi-bit is returned.
.proc
.globl _P_ROTL
_P_ROTL:
| movl #L2003, a0 |%
| jsr mcount |%
| .bss |%
| .even |%
|L2003: .skip 4 |%
| .text |%
movl sp@(0x4), a0 | claim arguments
movl sp@(0x8), d0
movl d2, a1 | preserve d2
movw _global_precision, d1 | longword count
movw d1, d2 | save a copy
lslw #2, d1
addw d1, a0 | adjust array pointer
andw #0x1c, d1
negw d1 | branch offset in d1
lsrw #3, d2 | 8 longwords/loop; count in d2
asrl #1, d0 | set X if necessary
jmp pc@(0x22,d1:w)
1:
roxlw a0@-
roxlw a0@-
roxlw a0@-
roxlw a0@-
roxlw a0@-
roxlw a0@-
roxlw a0@-
roxlw a0@-
roxlw a0@-
roxlw a0@-
roxlw a0@-
roxlw a0@-
roxlw a0@-
roxlw a0@-
roxlw a0@-
roxlw a0@-
dbf d2, 1b
roxll #1, d0
movl a1, d2
rts
| P_SMUL(*a, *b, x) performs a += b * x. Pointers are to the LSB.
.proc
.globl _P_SMUL
_P_SMUL:
| movl #L2004, a0 |%
| jsr mcount |%
| .bss |%
| .even |%
|L2004: .skip 4 |%
| .text |%
movl sp@(0x4), a0 | claim arguments
movl sp@(0x8), a1
movl sp@(0xc), d1
tstl d1 | horrible kludge to speed multiply by 0
beq 3f
moveml #0x3c00, sp@- | d2/d3/d4/d5; Sun's optimizer is really *DUMB*
movw _global_precision, d5 | longword count; 0 will fail
subqw #2, d5 | first longword not handled in loop
clrl d4
movl a1@, d2
mulul d1, d3:d2 | d3 is carry
addl d2, a0@ | accumulate
| tstw d5 | This code needed if global_precision < 2
| blt 2f | only one longword?
1:
movl a0@-, d0
addxl d3, d0 | accumulate carry and X-bit
movl a1@-, d2
mulul d1, d3:d2 | d3 is carry
addxl d4, d3 | add X-bit to carry
addl d2, d0 | accumulate
movl d0, a0@
dbf d5, 1b
2:
addxl d4, d3 | add X-bit to carry
movl d3, a0@-
moveml sp@+, #0x3c | d2/d3/d4/d5; Sun's optimizer is really *DUMB*
3:
rts
| P_DMUL(*a, *b, *c) performs a = b * c.
.proc
.globl _P_DMUL
_P_DMUL:
| movl #L2005, a0 |%
| jsr mcount |%
| .bss |%
| .even |%
|L2005: .skip 4 |%
| .text |%
moveml #0x3f38, sp@- | d2-d7/a2-a4; Sun's optimizer is really *DUMB*
movl sp@(0x28), a0 | claim arguments
movl sp@(0x2c), a1
movl sp@(0x30), a2
movw _global_precision, d0
subqw #2, d0 | global_precision - 2
movl a0, a4 | product
movw d0, d7 | count for multiplicand
movl a1, a3 | multiplicand
bra 2f
1:
clrl a4@+
2:
tstl a3@+
dbne d7, 1b | d7 contains effective size of the multiplicand-2
movl d0, d6 | count for multiplier
movl a2, a3 | multiplier
bra 2f
1:
clrl a4@+
2:
tstl a3@+
dbne d6, 1b | d6 contains effective size of the multiplier-2
addqw #1, d0 | global_precision - 1
lslw #2, d0
addw d0, a1 | pointer to LSB of the multiplicand
addw d0, a2 | pointer to LSB of the multiplier
addw d0, a0
addw d0, a0
addql #4, a0 | pointer to LSB of product - KLUDGE!
| First partial product not handled in loop
| Assumes that the X-bit is clear from the above contortions.
clrl d4
movl a0, a3 | product
movl a1, a4 | multiplicand
movl a2@, d1 | one longword of the multiplier
movw d7, d5 | loop count
movl a4@, d2
mulul d1, d3:d2 | d3 is carry
movl d2, a3@ | store product
1:
movl a4@-, d2
mulul d1, d0:d2
addxl d3, d2
movl d0, d3
movl d2, a3@-
dbf d5, 1b
addxl d4, d3
movl d3, a3@-
| The other partial products
2:
movl a1, a4 | multiplicand
movl a2@-, d1 | another longword of the multiplier
movw d7, d5 | loop count
movl a4@, d2
mulul d1, d3:d2 | d3 is carry
addl d2, a0@- | accumulate
movl a0, a3 | product
1:
movl a3@-, d0
addxl d3, d0 | accumulate carry and X-bit
movl a4@-, d2
mulul d1, d3:d2 | d3 is carry
addxl d4, d3 | add X-bit to carry
addl d2, d0 | accumulate
movl d0, a3@
dbf d5, 1b
addxl d4, d3 | add X-bit to carry
movl d3, a3@-
dbf d6, 2b
moveml sp@+, #0x1cfc | d2-d7/a2-a4; Sun's optimizer is really *DUMB*
rts